library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, last
library(ggplot2)
library(GGally)
## 
## Attaching package: 'GGally'
## 
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(tidyr)
"%&%" = function(a,b) paste(a,b,sep="")
my.dir = "/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/GEUVADIS/"

Pairwise comparison of E-N R2

pops = scan(my.dir %&% 'poplist','c')
 for(alpha in c(0, 0.5,1)){ 
  for(pop in pops){
    data <- fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_10-foldCV_elasticNet_alpha" %&% alpha %&% "_1KGP_geuMAF0.01_all_chr1-22_2016-05-10.txt")
    R2 = data.frame(dplyr::select(data,R2)) %>% mutate(R2=ifelse(is.na(R2),0,R2)) #set NAs to zero
    colnames(R2) = pop
    if(exists("allR2") == FALSE){
      allR2 = R2
    }else{
      allR2 <- cbind(allR2, R2)
    }
  }
  print(ggpairs(allR2,lower=list(continuous="points",params=c(cex=0.7,ylim=c(0,1),xlim=c(0,1))),diag=list(continuous='blank'),title="E-N (alpha=" %&% alpha %&% ") 10-fold CV R2"))
  rm("allR2")
}

Pairwise comparison of GCTA h2

pops = scan(my.dir %&% 'poplist','c')
for(pop in pops){
    data <- fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_gcta_reml_local_h2_1KGP_geuMAF0.01_all_chr1-22_2016-05-02.txt")
    h2 = data.frame(dplyr::select(data,local.h2)) %>% mutate(local.h2=ifelse(is.na(local.h2),0,local.h2)) #set NAs to zero
    colnames(h2) = pop
    if(exists("allh2") == FALSE){
      allh2 = h2
    }else{
      allh2 <- cbind(allh2, h2)
    }
  }
print(ggpairs(allh2,lower=list(continuous="points",params=c(cex=0.7,ylim=c(0,1),xlim=c(0,1))),diag=list(continuous='blank'),title="h2 estimates"))

rm("allh2")

Compare E-N in YRI to other pops

pops = scan(my.dir %&% 'poplist','c')
for(alpha in c(0,0.5,1)){ 
  for(pop in pops){
    data <- fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_10-foldCV_elasticNet_alpha" %&% alpha %&% "_1KGP_geuMAF0.01_all_chr1-22_2016-05-10.txt")
    R2 = data.frame(dplyr::select(data,R2)) %>% mutate(R2=ifelse(is.na(R2),0,R2)) #set NAs to zero
    colnames(R2) = pop
    if(exists("allR2") == FALSE){
      allR2 = R2
    }else{
      allR2 <- cbind(allR2, R2)
    }
  }
  comp <- gather(allR2,pop,R2,-YRI) %>% mutate(diff=YRI-R2)
  print(ggplot(comp, aes(pop,diff,fill=pop,color=pop)) +  ylab("YRI R2 - pop R2") + coord_cartesian(ylim=c(-1,1)) + 
      geom_point(position=position_jitterdodge(),size=1) +
      geom_boxplot(fill="white",outlier.colour = NA, position = position_dodge()) + ggtitle("E-N alpha=" %&% alpha))
  topcomp <- filter(comp,abs(diff)>0.25)
  print(ggplot(topcomp,aes(pop,fill=pop)) + geom_bar() + ggtitle("Number of genes abs(R2 diff) > 0.25 E-N alpha=" %&% alpha))
  rm("allR2")
}

Compare E-N in ALL to other pops

pops = scan(my.dir %&% 'poplist','c')
for(alpha in c(0,0.5,1)){ 
  for(pop in pops){
    data <- fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_10-foldCV_elasticNet_alpha" %&% alpha %&% "_1KGP_geuMAF0.01_all_chr1-22_2016-05-10.txt")
    R2 = data.frame(dplyr::select(data,R2)) %>% mutate(R2=ifelse(is.na(R2),0,R2)) #set NAs to zero
    colnames(R2) = pop
    if(exists("allR2") == FALSE){
      allR2 = R2
    }else{
      allR2 <- cbind(allR2, R2)
    }
  }
  comp <- gather(allR2,pop,R2,-ALL) %>% mutate(diff=ALL-R2)
  print(ggplot(comp, aes(pop,diff,fill=pop,color=pop)) +  ylab("ALL R2 - pop R2") + coord_cartesian(ylim=c(-1,1)) + 
      geom_point(position=position_jitterdodge(),size=1) +
      geom_boxplot(fill="white",outlier.colour = NA, position = position_dodge()) + ggtitle("E-N alpha=" %&% alpha))
  topcomp <- filter(comp,abs(diff)>0.25)
  print(ggplot(topcomp,aes(pop,fill=pop)) + geom_bar() + ggtitle("Number of genes abs(R2 diff) > 0.25 E-N alpha=" %&% alpha))
  rm("allR2")
}

h2 vs R2

pops = scan(my.dir %&% 'poplist','c')
 for(alpha in c(0,0.5,1)){ #rerun with 0 when CEU chr1 finishes
  for(pop in pops){
    h2 = fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_gcta_reml_local_h2_1KGP_geuMAF0.01_all_chr1-22_2016-05-02.txt") %>% mutate(ymin = pmax(0, local.h2 - 2 * local.se), ymax = pmin(1, local.h2 + 2 * local.se), local.h2=ifelse(is.na(local.h2),0,local.h2)) #set NAs to zero
    r2 =fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_10-foldCV_elasticNet_alpha" %&% alpha %&% "_1KGP_geuMAF0.01_all_chr1-22_2016-05-10.txt") %>% mutate(ensid=gene,R2=ifelse(is.na(R2),0,R2)) 
    all = inner_join(h2,r2,by='ensid') %>% arrange(local.h2)
    p1<-ggplot(all,aes(x=1:nrow(all),y=local.h2,ymin=ymin, ymax=ymax) ) + geom_pointrange(col='gray')+geom_point()
    print(p1 + geom_point(data=all,aes(x=1:nrow(all),y=R2),color='red',cex=0.8) + xlab(expression("Genes sorted by h"^2)) + ylab(expression(h^{2} ~"(black) or " ~ R^{2} ~ "(red)")) +theme_bw(20) + ggtitle(pop %&% " E-N (alpha=" %&% alpha %&% ")"))
    print(cor.test(all$local.h2,all$R2))
  }
}

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 136.12, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6630025 0.6773274
## sample estimates:
##       cor 
## 0.6702274
## Warning: Removed 184 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 28.737, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1747061 0.1997998
## sample estimates:
##       cor 
## 0.1872835
## Warning: Removed 2 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 63.911, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3792947 0.4013379
## sample estimates:
##       cor 
## 0.3903722

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 62.951, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3742589 0.3964028
## sample estimates:
##       cor 
## 0.3853863

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 53.379, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3222230 0.3453311
## sample estimates:
##       cor 
## 0.3338272

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 29.842, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1816684 0.2066934
## sample estimates:
##       cor 
## 0.1942125

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 146.56, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6903763 0.7037449
## sample estimates:
##       cor 
## 0.6971212
## Warning: Removed 184 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 71.857, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4196770 0.4408675
## sample estimates:
##       cor 
## 0.4303315
## Warning: Removed 2 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 104.63, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5614162 0.5789660
## sample estimates:
##       cor 
## 0.5702562

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 98.36, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5373125 0.5555522
## sample estimates:
##       cor 
## 0.5464972

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 90.884, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5067610 0.5258337
## sample estimates:
##       cor 
## 0.5163614

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 83.875, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4762596 0.4961173
## sample estimates:
##       cor 
## 0.4862513

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 145.82, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6885286 0.7019629
## sample estimates:
##       cor 
## 0.6953065
## Warning: Removed 184 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 64.517, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3824570 0.4044365
## sample estimates:
##      cor 
## 0.393503
## Warning: Removed 2 rows containing missing values (geom_segment).

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 98.514, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5379220 0.5561447
## sample estimates:
##       cor 
## 0.5470981

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 91.27, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5083899 0.5274193
## sample estimates:
##       cor 
## 0.5179687

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 84.82, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4804768 0.5002287
## sample estimates:
##       cor 
## 0.4904157

## 
##  Pearson's product-moment correlation
## 
## data:  all$local.h2 and all$R2
## t = 79.164, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4547259 0.4751097
## sample estimates:
##       cor 
## 0.4649794